import numpy as np

import pandas as pd
df=pd.read_csv("Titanic-Dataset.csv")
# print(df.shape)
# print(df.info())
# #统计特征
# print(df[["Age","Fare"]].describe())
import random
rID=random.randint(0,891)
print(df.loc[rID])#loc索引 行索引
print(df.isna().sum())#缺失值统计
#处理年龄缺失值
df["Age"]=df.groupby("Pclass")["Age"].transform(
    lambda x:x.fillna(x.mean())
)
print(df.isna().sum())
#对Cabin处理
df["Cabin"]=df["Cabin"].fillna("N")
print(df.isna().sum())
#处理Embarked
print(df[df["Embarked"].isna()])
#查看Pclass=1的各个港口乘客数量
print(df[df["Pclass"] == 1].groupby("Embarked").size())
#填充缺失值为S
df["Embarked"]=df["Embarked"].fillna("S")
print(df.isna().sum())
#导出处理后的数据集
df.to_csv("Titanic-Dataset-Cleared.csv",index=False)
